# install.packages(c("snow", "numDeriv"))
library(snow) # For running on MPI server (or locally) in parallel 
library(numDeriv) # If running on MPI server, install on server also

# Location of Functions.R
workingDirectory <- "~/Dropbox (Personal)/Academic/The Statistical Analysis of Misreporting/ReplicationData"
# Location to output csvs from simulations
outputDirectory <- "~/Dropbox (Personal)/Academic/The Statistical Analysis of Misreporting/ReplicationData"

setwd(workingDirectory)
source("Functions.R")

# NOTE: Set the wallclock on compute server to 24 hours for each set of
# 1000 simulations
# Takes roughly 14 hours to run 1000 simulations on 32 cores on Colosse

########## SIMULATION STUDY 1

study1 <- function(iterate) {
    library(numDeriv)

    n <- 3000
    J <- 4 # Number of control items
    x <- as.matrix(rep(1, n)) # Intercept-only model

    y <- rbinom(n, J, 0.5) # i.e. plogis(0)
    treat <- sample(rep(c(0, 1), each = n/2)) # Half treatment, half control
    true.belief <- rbinom(n, 1, 0.5) # 0.5 hold sensitive belief (intercept = 0)
    y[treat == 1] <- y[treat == 1] + true.belief[treat == 1]

    model.sim.standard <- listExperiment(y ~ 1,
                                         data = data.frame(y, treat),
                                         treatment = "treat", J = J,
                                         sensitive.response = 1,
                                         verbose = FALSE,
                                         control.constrained = "none")

    # 10% misreport
    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = 0.1)
    direct[liar != 1 & true.belief == 1] <- 1

    model.sim.proposed.10 <- listExperiment(y ~ 1,
                                            data = data.frame(y, treat, direct),
                                            treatment = "treat", J = J,
                                            direct = "direct",
                                            sensitive.response = 1,
                                            include.misreport.treatment = FALSE,
                                            verbose = FALSE,
                                            control.constrained = "partial")

    # 20% misreport
    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = 0.2)
    direct[liar != 1 & true.belief == 1] <- 1

    model.sim.proposed.20 <- listExperiment(y ~ 1,
                                            data = data.frame(y, treat, direct),
                                            treatment = "treat", J = J,
                                            direct = "direct",
                                            sensitive.response = 1,
                                            include.misreport.treatment = FALSE,
                                            verbose = FALSE,
                                            control.constrained = "partial")

    # 30% misreport
    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = 0.3)
    direct[liar != 1 & true.belief == 1] <- 1

    model.sim.proposed.30 <- listExperiment(y ~ 1,
                                            data = data.frame(y, treat, direct),
                                            treatment = "treat", J = J,
                                            direct = "direct",
                                            sensitive.response = 1,
                                            include.misreport.treatment = FALSE,
                                            verbose = FALSE,
                                            control.constrained = "partial")

    # 40% misreport
    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = 0.4)
    direct[liar != 1 & true.belief == 1] <- 1

    model.sim.proposed.40 <- listExperiment(y ~ 1,
                                            data = data.frame(y, treat, direct),
                                            treatment = "treat", J = J,
                                            direct = "direct",
                                            sensitive.response = 1,
                                            include.misreport.treatment = FALSE,
                                            verbose = FALSE,
                                            control.constrained = "partial")

    # 50% misreport
    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = 0.5)
    direct[liar != 1 & true.belief == 1] <- 1

    model.sim.proposed.50 <- listExperiment(y ~ 1,
                                            data = data.frame(y, treat, direct),
                                            treatment = "treat", J = J,
                                            direct = "direct",
                                            sensitive.response = 1,
                                            include.misreport.treatment = FALSE,
                                            verbose = FALSE,
                                            control.constrained = "partial")

    # 60% misreport
    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = 0.6)
    direct[liar != 1 & true.belief == 1] <- 1

    model.sim.proposed.60 <- listExperiment(y ~ 1,
                                            data = data.frame(y, treat, direct),
                                            treatment = "treat", J = J,
                                            direct = "direct",
                                            sensitive.response = 1,
                                            include.misreport.treatment = FALSE,
                                            verbose = FALSE,
                                            control.constrained = "partial")

    # 70% misreport
    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = 0.7)
    direct[liar != 1 & true.belief == 1] <- 1

    model.sim.proposed.70 <- listExperiment(y ~ 1,
                                            data = data.frame(y, treat, direct),
                                            treatment = "treat", J = J,
                                            direct = "direct",
                                            sensitive.response = 1,
                                            include.misreport.treatment = FALSE,
                                            verbose = FALSE,
                                            control.constrained = "partial")

    # 80% misreport
    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = 0.8)
    direct[liar != 1 & true.belief == 1] <- 1

    model.sim.proposed.80 <- listExperiment(y ~ 1,
                                            data = data.frame(y, treat, direct),
                                            treatment = "treat", J = J,
                                            direct = "direct",
                                            sensitive.response = 1,
                                            include.misreport.treatment = FALSE,
                                            verbose = FALSE,
                                            control.constrained = "partial")

    # 90% misreport
    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = 0.9)
    direct[liar != 1 & true.belief == 1] <- 1

    model.sim.proposed.90 <- listExperiment(y ~ 1,
                                            data = data.frame(y, treat, direct),
                                            treatment = "treat", J = J,
                                            direct = "direct",
                                            sensitive.response = 1,
                                            include.misreport.treatment = FALSE,
                                            verbose = FALSE,
                                            control.constrained = "partial")

    return(list(model.sim.standard = model.sim.standard,
                model.sim.proposed.10 = model.sim.proposed.10,
                model.sim.proposed.20 = model.sim.proposed.20,
                model.sim.proposed.30 = model.sim.proposed.30,
                model.sim.proposed.40 = model.sim.proposed.40,
                model.sim.proposed.50 = model.sim.proposed.50,
                model.sim.proposed.60 = model.sim.proposed.60,
                model.sim.proposed.70 = model.sim.proposed.70,
                model.sim.proposed.80 = model.sim.proposed.80,
                model.sim.proposed.90 = model.sim.proposed.90))
}

n_cores <- as.numeric(Sys.getenv("MOAB_PROCCOUNT"))
CL <- makeCluster(n_cores, type = "MPI")
# If replicating on personal computer instead of MPI cluster:
# n_cores <- 4 # Number of threads/cores on which to run simulations
# CL <- makeCluster(n_cores, type = "SOCK")


# Note that this file was run multiple times on an MPI cluster to generate
# more than the 1000 simulations listed here. Output includes time-stamped
# file names to permit multiple runs and csvs which are then combined
# in the file SimStudyGraphs.R
n_sims <- 1000

# Export necessary functions to cluster
clusterExport(CL, list("logAdd", "mstepMisreport", "mstepSensitive",
                       "mstepControl", "mstepOutcome", "estep",
                       "listExperiment", "study1"))

# Run simluations
sim.out <- parLapply(CL, 1:n_sims, study1)

# Convert output to lists of model objects
model.sim.standard    <- sapply(sim.out, function(X) X$model.sim.standard, simplify = FALSE)
model.sim.proposed.10 <- sapply(sim.out, function(X) X$model.sim.proposed.10, simplify = FALSE)
model.sim.proposed.20 <- sapply(sim.out, function(X) X$model.sim.proposed.20, simplify = FALSE)
model.sim.proposed.30 <- sapply(sim.out, function(X) X$model.sim.proposed.30, simplify = FALSE)
model.sim.proposed.40 <- sapply(sim.out, function(X) X$model.sim.proposed.40, simplify = FALSE)
model.sim.proposed.50 <- sapply(sim.out, function(X) X$model.sim.proposed.50, simplify = FALSE)
model.sim.proposed.60 <- sapply(sim.out, function(X) X$model.sim.proposed.60, simplify = FALSE)
model.sim.proposed.70 <- sapply(sim.out, function(X) X$model.sim.proposed.70, simplify = FALSE)
model.sim.proposed.80 <- sapply(sim.out, function(X) X$model.sim.proposed.80, simplify = FALSE)
model.sim.proposed.90 <- sapply(sim.out, function(X) X$model.sim.proposed.90, simplify = FALSE)

# Function to pull required parameter estimates and standard errors from each model
study1Pars <- function(X, modelName, proportion = NA) {
  par.control.intercept <- sapply(X, function(X) X$par.control["(Intercept)"])
  se.control.intercept <- sapply(X, function(X) X$se.control["(Intercept)"])
  par.control.z <- sapply(X, function(X) X$par.control["Z == 1"])
  se.control.z <- sapply(X, function(X) X$se.control["Z == 1"])

  par.sensitive <- sapply(X, function(X) X$par.sensitive)
  se.sensitive <- sapply(X, function(X) X$se.sensitive)

  par.misreport <- sapply(X, function(X) X$par.misreport)
  se.misreport <- sapply(X, function(X) X$se.misreport)

  if(is.list(par.misreport)) {
    par.misreport <- rep(NA, length(par.misreport))
    se.misreport <- rep(NA, length(se.misreport))
  }

  Out <- data.frame(model = modelName,
                    proportion = proportion,
                    par.control.intercept = par.control.intercept,
                    se.control.intercept = se.control.intercept,
                    par.control.z = par.control.z,
                    se.control.z = se.control.z,
                    par.sensitive = par.sensitive,
                    se.sensitive = se.sensitive, 
                    par.misreport = par.misreport,
                    se.misreport = se.misreport)
  row.names(Out) <- 1:nrow(Out)
  return(Out)
}

sims.standard <-     study1Pars(model.sim.standard,    model = "Standard estimator")
sims.proposed.10 <-  study1Pars(model.sim.proposed.10, model = "Proposed estimator", proportion = 0.10)
sims.proposed.20 <-  study1Pars(model.sim.proposed.20, model = "Proposed estimator", proportion = 0.20)
sims.proposed.30 <-  study1Pars(model.sim.proposed.30, model = "Proposed estimator", proportion = 0.30)
sims.proposed.40 <-  study1Pars(model.sim.proposed.40, model = "Proposed estimator", proportion = 0.40)
sims.proposed.50 <-  study1Pars(model.sim.proposed.50, model = "Proposed estimator", proportion = 0.50)
sims.proposed.60 <-  study1Pars(model.sim.proposed.60, model = "Proposed estimator", proportion = 0.60)
sims.proposed.70 <-  study1Pars(model.sim.proposed.70, model = "Proposed estimator", proportion = 0.70)
sims.proposed.80 <-  study1Pars(model.sim.proposed.80, model = "Proposed estimator", proportion = 0.80)
sims.proposed.90 <-  study1Pars(model.sim.proposed.90, model = "Proposed estimator", proportion = 0.90)

SS1 <- rbind(sims.standard,
             sims.proposed.10, sims.proposed.20, sims.proposed.30,
             sims.proposed.40, sims.proposed.50, sims.proposed.60,
             sims.proposed.70, sims.proposed.80, sims.proposed.90)

# Increase Sys.time() accuracy to 6 digits to avoid filename overlap
options(digits.secs = 6)
write.csv(SS1, paste0(outputDirectory, "/Sim1-", gsub(":|\\.", "-", Sys.time()), ".csv"), row.names = FALSE)

stopCluster(CL)
